索引操作
最近更新时间: 2024-10-17 17:10:00
索引创建
普通索引
postgres=# CREATE TABLE t_appoint(id int,name text) WITh (orientation = 'column');
CREATE TABLE
postgres=# CREATE INDEX t_appoint_id_idx ON t_appoint_col USING btree(id);
CREATE INDEX
唯一索引
创建唯一索引
postgres=# CREATE TABLE t_first_col_share (id int,nickname text) with (orientation = 'column');
CREATE TABLE
postgres=# CREATE UNIQUE INDEX t_first_col_share_id_uidx ON t_first_col_share using btree(id);
CREATE INDEX
非shard key字段不能建立唯一索引。
唯一索引必须带着分布键
postgres=# CREATE UNIQUE INDEX t_first_col_share_nickname_uidx on t_first_col_share using btree(nickname);
ERROR: Unique index of partitioned table must contain the hash/modulo distribution column.
表达式索引
postgres=# CREATE TABLE t_upper(id int,mc text) WITh (orientation = 'column');
NOTICE: Replica identity is needed for shard table, please add to this table through "" command.
CREATE TABLE
postgres=# INSERT INTO t_upper SELECT t,md5(t::text) FROM generate_series(1,10000) as t;
INSERT 0 10000
postgres=# ANALYZE t_upper;
ANALYZE
postgres=# EXPLAIN SELECT * FROM t_upper WHERE upper(mc)=md5('1');
QUERY PLAN
\------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Seq Scan on t_upper (cost=0.00..135.58 rows=25 width=37)
Filter: (upper(mc) = 'c4ca4238a0b923820dcc509a6f75849b'::text)
(4 rows)
postgres=# CREATE INDEX t_upper_mc on t_upper(upper(mc));
CREATE INDEX
postgres=# EXPLAIN SELECT * FROM t_upper WHERE upper(mc)=md5('1');
QUERY PLAN
\-----------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002, dn003
-> Index Scan using t_upper_mc on t_upper (cost=0.28..32.58 rows=17 width=36)
Index Cond: (upper(mc) = 'c4ca4238a0b923820dcc509a6f75849b'::text)
(4 rows)
条件索引
postgres=# CREATE TABLE t_sex(id int,sex text) WITH (orientation = 'column');
NOTICE: Replica identity is needed for shard table, please add to this table through "" command.
CREATE TABLE
postgres=# CREATE INDEX t_sex_sex_idx on t_sex (sex);
CREATE INDEX
postgres=# INSERT INTO t_sex SELECT t,'男' FROM generate_series(1,1000000) as t;
INSERT 0 1000000
postgres=# INSERT INTO t_sex SELECT t,'女' FROM generate_series(1,100) as t;
INSERT 0 100
postgres=# ANALYZE t_sex ;
ANALYZE
postgres=# EXPLAIN SELECT * FROM t_sex WHERE sex ='女';
QUERY PLAN
----------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Index Scan using t_sex_sex_idx on t_sex (cost=0.42..5.81 rows=67 width=8)
Index Cond: (sex = '女'::text)
(4 rows)
\#索引对于条件为男的情况下无效
postgres=# EXPLAIN SELECT * FROM t_sex WHERE sex ='男';
QUERY PLAN
-------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Seq Scan on t_sex (cost=0.00..9977.58 rows=500539 width=8)
Filter: (sex = '男'::text)
(4 rows)
\#连接 dn 节点查看索引点用空间大,而且度数也高
[tdapg@VM_0_37_centos shell]$ psql -p 11010
psql (PostgreSQL 10.0 TDSQL-A for PostgreSQL)
Type "help" for help.
postgres=# \di+
List of relations
Schema | Name | Type | Owner | Table | Size | Allocated Size | Description
--------+---------------+-------+-------+---------+-------+----------------+-------------
dbadmin | t_sex_sex_idx | index | dbadmin | t_sex | 14 MB | 14 MB |
dbadmin | t_upper_mc | index | dbadmin | t_upper | 14 MB | 14 MB |
(2 rows)
postgres=# \q
[tdapg@VM_0_37_centos shell]$ psql
psql (PostgreSQL 10.0 TDSQL-A for PostgreSQL)
Type "help" for help.
postgres=# DROP INDEX t_sex_sex_idx;
DROP INDEX
postgres=# CREATE INDEX t_sex_sex_idx on t_sex (sex) WHERE sex='女';
CREATE INDEX
postgres=# ANALYZE t_sex;
ANALYZE
postgres=# EXPLAIN SELECT * FROM t_sex WHERE sex ='女';
QUERY PLAN
----------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Index Scan using t_sex_sex_idx on t_sex (cost=0.14..6.69 rows=33 width=8)
(3 rows)
postgres=# EXPLAIN SELECT * FROM t_sex WHERE sex ='男';
QUERY PLAN
-------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Seq Scan on t_sex (cost=0.00..9977.58 rows=500573 width=8)
Filter: (sex = '男'::text)
(4 rows)
postgres=# \q
[tdapg@VM_0_37_centos shell]$ psql -p 11010
psql (PostgreSQL 10.0 TDSQL-A for PostgreSQL)
Type "help" for help.
postgres=# \di+
List of relations
Schema | Name | Type | Owner | Table | Size | Allocated Size | Description
--------+---------------+-------+-------+---------+-------+----------------+-------------
dbadmin | t_sex_sex_idx | index | dbadmin | t_sex | 16 kB | 16 kB |
dbadmin | t_upper_mc | index | dbadmin| t_upper | 14 MB | 14 MB |
(2 rows)
postgres=#
gist 索引
postgres=# CREATE TABLE t_trgm (id int,trgm text,no_trgm text) ;
NOTICE: Replica identity is needed for shard table, please add to this table through "alter table" command.
CREATE TABLE
postgres=# CREATE INDEX t_trgm_trgm_idx ON t_trgm USING gist(trgm gist_trgm_ops);
CREATE INDEX
列存模式不支持 gist 索引。
gin 索引
postgres=# DROP INDEX t_trgm_trgm_idx;
DROP INDEX
postgres=# CREATE INDEX t_trgm_trgm_idx ON t_trgm USING gin(trgm gin_trgm_ops);
CREATE INDEX
列存模式不支持 gin 索引。
jsonb 索引
postgres=# CREATE TABLE t_jsonb(id int,f_jsonb jsonb);
NOTICE: Replica identity is needed for shard table, please add to this table through "alter table" command.
CREATE TABLE
postgres=# CREATE INDEX t_jsonb_f_jsonb_idx ON t_jsonb using gin(f_jsonb);
CREATE INDEX
列存模式不支持 jsonb 索引。
数组索引
postgres=# CREATE TABLE t_array(id int, mc text[]);
NOTICE: Replica identity is needed for shard table, please add to this table through "" command.
CREATE TABLE
postgres=# INSERT INTO t_array SELECT t,('{'||md5(t::text)||'}')::text[] FROM generate_series(1,1000000) as t;
INSERT 0 1000000
postgres=# ANALYZE;
ANALYZE
postgres=# \timing
Timing is on.
postgres=# EXPLAIN SELECT * FROM t_array WHERE mc @> ('{'||md5('1')||'}')::text[];
QUERY PLAN
---------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Gather (cost=1000.00..12060.25 rows=2503 width=61)
Workers Planned: 2
-> Parallel Seq Scan on t_array (cost=0.00..10809.95 rows=1043 width=61)
Filter: (mc @> ('{c4ca4238a0b923820dcc509a6f75849b}'::cstring)::text[])
(6 rows)
Time: 4.105 ms
postgres=# SELECT * FROM t_array WHERE mc @> ('{'||md5('1')||'}')::text[];
id | mc
----+------------------------------------
1 | {c4ca4238a0b923820dcc509a6f75849b}
(1 row)
Time: 494.371 ms
postgres=# CREATE INDEX t_array_mc_idx ON t_array using gin(mc);
CREATE INDEX
Time: 8195.387 ms (00:08.195)
postgres=# EXPLAIN SELECT * FROM t_array WHERE mc @> ('{'||md5('1')||'}')::text[];
QUERY PLAN
-------------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Bitmap Heap Scan on t_array (cost=29.40..3172.64 rows=2503 width=61)
Recheck Cond: (mc @> ('{c4ca4238a0b923820dcc509a6f75849b}'::cstring)::text[])
-> Bitmap Index Scan on t_array_mc_idx (cost=0.00..28.78 rows=2503 width=0)
Index Cond: (mc @> ('{c4ca4238a0b923820dcc509a6f75849b}'::cstring)::text[])
(6 rows)
Time: 1.716 ms
postgres=# SELECT * FROM t_array WHERE mc @> ('{'||md5('1')||'}')::text[];
id | mc
----+------------------------------------
1 | {c4ca4238a0b923820dcc509a6f75849b}
(1 row)
Time: 2.980 ms
列存模式不支持数组索引。
Btree_gin 任意字段索引
postgres=# CREATE TABLE gin_mul(f1 int, f2 int, f3 timestamp, f4 text, f5 numeric, f6 text);
NOTICE: Replica identity is needed for shard table, please add to this table through "" command.
CREATE TABLE
postgres=# INSERT INTO gin_mul SELECT random()*5000, random()*6000, now()+((30000-60000*random())||' sec')::interval , md5(random()::text), round((random()*100000)::numeric,2), md5(random()::text) FROM generate_series(1,1000000);
INSERT 0 1000000
postgres=# CREATE EXTENSION btree_gin;
CREATE EXTENSION
postgres=# CREATE INDEX gin_mul_gin_idx ON gin_mul using gin(f1,f2,f3,f4,f5,f6);
CREATE INDEX
#单字段查询
postgres=# EXPLAIN SELECT * FROM gin_mul WHERE f1=10;
QUERY PLAN
---------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn002
-> Bitmap Heap Scan on gin_mul (cost=11.51..369.70 rows=194 width=90)
Recheck Cond: (f1 = 10)
-> Bitmap Index Scan on gin_mul_gin_idx (cost=0.00..11.46 rows=194 width=0)
Index Cond: (f1 = 10)
(6 rows)
postgres=# EXPLAIN SELECT * FROM gin_mul WHERE f3='2019-02-18 23:01:01';
QUERY PLAN
-------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Bitmap Heap Scan on gin_mul (cost=10.01..12.02 rows=1 width=90)
Recheck Cond: (f3 = '2019-02-18 23:01:01'::timestamp without time zone)
-> Bitmap Index Scan on gin_mul_gin_idx (cost=0.00..10.01 rows=1 width=0)
Index Cond: (f3 = '2019-02-18 23:01:01'::timestamp without time zone)
(6 rows)
postgres=# EXPLAIN SELECT * FROM gin_mul WHERE f4='2364d9969c8b66402c9b7d17a6d5b7d3';
QUERY PLAN
-------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Bitmap Heap Scan on gin_mul (cost=10.01..12.02 rows=1 width=90)
Recheck Cond: (f4 = '2364d9969c8b66402c9b7d17a6d5b7d3'::text)
-> Bitmap Index Scan on gin_mul_gin_idx (cost=0.00..10.01 rows=1 width=0)
Index Cond: (f4 = '2364d9969c8b66402c9b7d17a6d5b7d3'::text)
(6 rows)
postgres=# EXPLAIN SELECT * FROM gin_mul WHERE f5=85375.30;
QUERY PLAN
-------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Bitmap Heap Scan on gin_mul (cost=10.01..12.02 rows=1 width=90)
Recheck Cond: (f5 = 85375.30)
-> Bitmap Index Scan on gin_mul_gin_idx (cost=0.00..10.01 rows=1 width=0)
Index Cond: (f5 = 85375.30)
(6 rows)
#二个字段组合
postgres=# EXPLAIN SELECT * FROM gin_mul WHERE f1=2 and f3='2019-02-18 16:59:52.872523';
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001
-> Bitmap Heap Scan on gin_mul (cost=18.00..20.02 rows=1 width=90)
Recheck Cond: ((f1 = 2) AND (f3 = '2019-02-18 16:59:52.872523'::timestamp without time zone))
-> Bitmap Index Scan on gin_mul_gin_idx (cost=0.00..18.00 rows=1 width=0)
Index Cond: ((f1 = 2) AND (f3 = '2019-02-18 16:59:52.872523'::timestamp without time zone))
(6 rows)
#三字段组合查询
postgres=# EXPLAIN SELECT * FROM gin_mul WHERE f1=2 and f3='2019-02-18 16:59:52.872523' and f6='fa627dc16c2bd026150afa0453a0991d';
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001
-> Bitmap Heap Scan on gin_mul (cost=26.00..28.02 rows=1 width=90)
Recheck Cond: ((f1 = 2) AND (f3 = '2019-02-18 16:59:52.872523'::timestamp without time zone) AND (f6 = 'fa627dc16c2bd026150afa0453a0991d'::text))
-> Bitmap Index Scan on gin_mul_gin_idx (cost=0.00..26.00 rows=1 width=0)
Index Cond: ((f1 = 2) AND (f3 = '2019-02-18 16:59:52.872523'::timestamp without time zone) AND (f6 = 'fa627dc16c2bd026150afa0453a0991d'::text))
(6 rows)
多字段索引
postgres=# CREATE TABLE t_mul_idx (f1 int,f2 int,f3 int,f4 int);
NOTICE: Replica identity is needed for shard table, please add to this table through "" command.
CREATE TABLE
Time: 308.109 ms
postgres=# CREATE INDEX t_mul_idx_idx ON t_mul_idx(f1,f2,f3);
CREATE INDEX
Time: 108.734 ms
多字段使用注意事项
or 查询条件 bitmap scan 最多支持两个不同字段条件。
postgres=# INSERT INTO t_mul_idx SELECT t,t,t,t FROM generate_series(1,1000000) as t;
INSERT 0 1000000
postgres=# ANALYZE ;
ANALYZE
postgres=# EXPLAIN SELECT * FROM t_mul_idx WHERE f1=1 or f2=2 ;
QUERY PLAN
-------------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Bitmap Heap Scan on t_mul_idx (cost=7617.08..7621.07 rows=2 width=16)
Recheck Cond: ((f1 = 1) OR (f2 = 2))
-> BitmapOr (cost=7617.08..7617.08 rows=2 width=0)
-> Bitmap Index Scan on t_mul_idx_idx (cost=0.00..2.43 rows=1 width=0)
Index Cond: (f1 = 1)
-> Bitmap Index Scan on t_mul_idx_idx (cost=0.00..7614.65 rows=1 width=0)
Index Cond: (f2 = 2)
(9 rows)
Time: 3.655 ms
postgres=# EXPLAIN SELECT * FROM t_mul_idx WHERE f1=1 or f2=2 or f1=3 ;
QUERY PLAN
-------------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Bitmap Heap Scan on t_mul_idx (cost=7619.51..7625.49 rows=3 width=16)
Recheck Cond: ((f1 = 1) OR (f2 = 2) OR (f1 = 3))
-> BitmapOr (cost=7619.51..7619.51 rows=3 width=0)
-> Bitmap Index Scan on t_mul_idx_idx (cost=0.00..2.43 rows=1 width=0)
Index Cond: (f1 = 1)
-> Bitmap Index Scan on t_mul_idx_idx (cost=0.00..7614.65 rows=1 width=0)
Index Cond: (f2 = 2)
-> Bitmap Index Scan on t_mul_idx_idx (cost=0.00..2.43 rows=1 width=0)
Index Cond: (f1 = 3)
(11 rows)
Time: 3.429 ms
postgres=# EXPLAIN SELECT * FROM t_mul_idx WHERE f1=1 or f2=2 or f3=3 ;
QUERY PLAN
--------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001, dn002
-> Seq Scan on t_mul_idx (cost=0.00..12979.87 rows=3 width=16)
Filter: ((f1 = 1) OR (f2 = 2) OR (f3 = 3))
(4 rows)
Time: 1.679 ms
如果返回字段全部在索引文件中,则只需要扫描索引,IO 开销会更少。
postgres=# EXPLAIN SELECT f1,f2,f3 FROM t_mul_idx WHERE f1=1 ;
QUERY PLAN
-------------------------------------------------------------------------------------------
Remote Fast Query Execution (cost=0.00..0.00 rows=0 width=0)
Node/s: dn001
-> Index Only Scan using t_mul_idx_idx on t_mul_idx (cost=0.42..4.44 rows=1 width=12)
Index Cond: (f1 = 1)
(4 rows)
Time: 1.564 ms
更新性能比单字段多索引文件要好。
单字段:
postgres=# CREATE TABLE t_simple_idx (f1 int,f2 int,f3 int,f4 int);
NOTICE: Replica identity is needed for shard table, please add to this table through "alter table" command.
CREATE TABLE
Time: 25.926 ms
postgres=# CREATE INDEX t_simple_idx1 ON t_simple_idx(f1);
CREATE INDEX
Time: 31.568 ms
postgres=# CREATE INDEX t_simple_idx2 ON t_simple_idx(f2);
CREATE INDEX
Time: 26.315 ms
postgres=# CREATE INDEX t_simple_idx3 ON t_simple_idx(f3);
CREATE INDEX
Time: 23.997 ms
postgres=# INSERT INTO t_simple_idx SELECT t,t,t,t FROM generate_series(1,1000000) as t;
INSERT 0 1000000
Time: 4860.602 ms (00:04.861)
多字段索引走非第一字段查询时性能比独立的单字段差。
多字段:
postgres=# SELECT * FROM t_mul_idx WHERE f1=1;
f1 | f2 | f3 | f4
----+----+----+----
1 | 1 | 1 | 1
(1 row)
Time: 1.769 ms
postgres=# SELECT * FROM t_mul_idx WHERE f2=1;
f1 | f2 | f3 | f4
----+----+----+----
1 | 1 | 1 | 1
(1 row)
Time: 25.423 ms
postgres=# SELECT * FROM t_mul_idx WHERE f3=1;
f1 | f2 | f3 | f4
----+----+----+----
1 | 1 | 1 | 1
(1 row)
Time: 27.791 ms
独立字段:
postgres=# SELECT * FROM t_simple_idx WHERE f1=1;
f1 | f2 | f3 | f4
----+----+----+----
1 | 1 | 1 | 1
(1 row)
Time: 1.530 ms
postgres=# SELECT * FROM t_simple_idx WHERE f2=1;
f1 | f2 | f3 | f4
----+----+----+----
1 | 1 | 1 | 1
(1 row)
Time: 2.315 ms
postgres=# SELECT * FROM t_simple_idx WHERE f3=1;
f1 | f2 | f3 | f4
----+----+----+----
1 | 1 | 1 | 1
(1 row)
Time: 2.390 ms
索引修改
修改索引名称
postgres=# ALTER INDEX t_simple_idx1 RENAME TO t_simple_idx1_new;
ALTER INDEX
重建索引
postgres=# REINDEX INDEX t_simple_idx1_new;
REINDEX
删除索引
postgres=# DROP INDEX t_appoint_id_idx;
DROP INDEX